knitr::opts_chunk$set(echo = TRUE)
devtools::install_github("cmf-uchicago/cmfproperty")
## WARNING: Rtools is required to build R packages, but is not currently installed.
## 
## Please download and install Rtools 4.0 from https://cran.r-project.org/bin/windows/Rtools/.
## Skipping install of 'cmfproperty' from a github remote, the SHA1 (78054fd9) has not changed since last install.
##   Use `force = TRUE` to force installation
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.6     v dplyr   1.0.7
## v tidyr   1.1.4     v stringr 1.4.0
## v readr   2.1.1     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(DBI)
library(dbplyr)
## 
## 载入程辑包:'dbplyr'
## The following objects are masked from 'package:dplyr':
## 
##     ident, sql
library(sf)
## Linking to GEOS 3.9.1, GDAL 3.2.1, PROJ 7.2.1; sf_use_s2() is TRUE
library(cmfproperty)
library(lubridate)
## 
## 载入程辑包:'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
con <- dbConnect(RSQLite::SQLite(), "C:/Users/yitia/Desktop/470/part.1/detroit.sqlite")

DBI::dbListTables(con)
## [1] "assessments"      "blight"           "foreclosures"     "parcels"         
## [5] "parcels_historic" "sales"
sales <- dplyr::tbl(con, 'sales') %>% dplyr::collect()

assessments <- dplyr::tbl(con, 'assessments') %>% dplyr::collect()

blight <- dplyr::tbl(con, 'blight') %>% dplyr::collect()

foreclosures <- dplyr::tbl(con, 'foreclosures') %>% dplyr::collect()

parcels <- dplyr::tbl(con, 'parcels') %>% dplyr::collect()

parcels_historic <- dplyr::tbl(con, 'parcels_historic') %>% dplyr::collect()

dbDisconnect(con)

sales
assessments
blight
foreclosures
parcels
parcels_historic
sales <- sales %>% 
  select(parcel_num, sale_date, sale_price) 
sales$sale_year <-year(sales$sale_date)
sales
assessments <- assessments %>% 
  select(PARCELNO , year, ASSESSEDVALUE)

s1 <-sales %>%
  group_by(sale_year) %>%
  summarise(mean_price = mean(sale_price))
s2 <-sales %>%
  group_by(sale_year) %>%
  summarise(median_price = median(sales$sale_price))
s3 <-assessments %>%
  group_by(year) %>%
  summarise(mean_ASSESSEDVALUE = mean(ASSESSEDVALUE))
s4 <-assessments %>%
  group_by(year) %>%
  summarise(median_ASSESSEDVALUE = median(ASSESSEDVALUE))
join <- s1 %>% 
  left_join(s2)
## Joining, by = "sale_year"
join1 <- join %>% 
  left_join(s3, by=c( "sale_year"="year"))
join2 <- join1 %>% 
  left_join(s4, by=c( "sale_year"="year"))
join2
p <- ggplot(data = join2,
            mapping = aes(
              x = sale_year,
              y = mean_price))
p + geom_line()

p <- ggplot(data = join2,
            mapping = aes(
              x = sale_year,
              y = median_price))
p + geom_line()

p <- ggplot(data = join2,
            mapping = aes(
              x = sale_year,
              y = mean_ASSESSEDVALUE))
p + geom_line()

p <- ggplot(data = join2,
            mapping = aes(
              x = sale_year,
              y = median_ASSESSEDVALUE))
p + geom_line()

#section b
sales <- sales %>% 
  filter(as.numeric(`sale_price`) > 2500)
assessments <- assessments %>% 
  filter(year <= 2020)

joined <- sales %>% 
  left_join(assessments, by=c("parcel_num"="PARCELNO", "sale_year"="year"))

joined <- joined %>%
  select(parcel_num, sale_year, sale_price, ASSESSEDVALUE)


ratios <-
  cmfproperty::reformat_data(
    joined,
    sale_col = "sale_price",
    assessment_col = "ASSESSEDVALUE",
    sale_year_col = "sale_year",
  )
## [1] "Filtered out non-arm's length transactions"
## [1] "Inflation adjusted to 2020"
cmfproperty::make_report(ratios, 
                         jurisdiction_name = "Detroit, Michigan")
## 
## 
## processing file: report.Rmd
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |..                                                                    |   3%
##    inline R code fragments
## 
## 
  |                                                                            
  |....                                                                  |   6%
## label: options (with options) 
## List of 2
##  $ echo     : logi FALSE
##  $ out.width: chr "175px"
## 
## 
  |                                                                            
  |......                                                                |   9%
##   ordinary text without R code
## 
## 
  |                                                                            
  |........                                                              |  11%
## label: setup (with options) 
## List of 1
##  $ include: logi FALSE
## Joining, by = "TAX_YEAR"
## 
  |                                                                            
  |..........                                                            |  14%
##    inline R code fragments
## 
## 
  |                                                                            
  |............                                                          |  17%
## label: mainbinnedscatter
## 
  |                                                                            
  |..............                                                        |  20%
##    inline R code fragments
## 
## 
  |                                                                            
  |................                                                      |  23%
## label: over under bar
## 
  |                                                                            
  |..................                                                    |  26%
##   ordinary text without R code
## 
## 
  |                                                                            
  |....................                                                  |  29%
## label: iaao graphs
## 
  |                                                                            
  |......................                                                |  31%
##    inline R code fragments
## 
## 
  |                                                                            
  |........................                                              |  34%
## label: cod graph
## Warning: Removed 1 row(s) containing missing values (geom_path).

## Warning: Removed 1 row(s) containing missing values (geom_path).
## 
  |                                                                            
  |..........................                                            |  37%
##    inline R code fragments
## 
## 
  |                                                                            
  |............................                                          |  40%
## label: prd graph
## 
  |                                                                            
  |..............................                                        |  43%
##    inline R code fragments
## 
## 
  |                                                                            
  |................................                                      |  46%
## label: prb graph
## 
  |                                                                            
  |..................................                                    |  49%
##    inline R code fragments
## 
## 
  |                                                                            
  |....................................                                  |  51%
## label: standardstable (with options) 
## List of 1
##  $ results: chr "asis"
## 
## 
  |                                                                            
  |......................................                                |  54%
##   ordinary text without R code
## 
## 
  |                                                                            
  |........................................                              |  57%
## label: results_tbl_values (with options) 
## List of 1
##  $ results: chr "asis"
## 
## 
  |                                                                            
  |..........................................                            |  60%
##   ordinary text without R code
## 
## 
  |                                                                            
  |............................................                          |  63%
## label: all_asr_plot (with options) 
## List of 1
##  $ fig.height: num 7
## 
  |                                                                            
  |..............................................                        |  66%
##   ordinary text without R code
## 
## 
  |                                                                            
  |................................................                      |  69%
## label: sale_ratio_decile_tbl (with options) 
## List of 1
##  $ results: chr "asis"
## 
## 
  |                                                                            
  |..................................................                    |  71%
##   ordinary text without R code
## 
## 
  |                                                                            
  |....................................................                  |  74%
## label: monte carlo
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm):
## collapsing to unique 'x' values
## 
  |                                                                            
  |......................................................                |  77%
##   ordinary text without R code
## 
## 
  |                                                                            
  |........................................................              |  80%
## label: monte carlo graphs (with options) 
## List of 1
##  $ fig.height: num 5
## Warning: Removed 1 rows containing missing values (geom_vline).
## Warning: Removed 1 rows containing missing values (geom_vline).

## Warning: Removed 1 rows containing missing values (geom_vline).

## Warning: Removed 1 rows containing missing values (geom_vline).

## Warning: Removed 1 rows containing missing values (geom_vline).
## 
  |                                                                            
  |..........................................................            |  83%
##   ordinary text without R code
## 
## 
  |                                                                            
  |............................................................          |  86%
## label: vertical equity methods (with options) 
## List of 1
##  $ results: chr "asis"
## 
## 
  |                                                                            
  |..............................................................        |  89%
##    inline R code fragments
## 
## 
  |                                                                            
  |................................................................      |  91%
## label: diagnostics (with options) 
## List of 1
##  $ eval: logi FALSE
## 
## 
  |                                                                            
  |..................................................................    |  94%
##    inline R code fragments
## 
## 
  |                                                                            
  |....................................................................  |  97%
## label: diagnostics2 (with options) 
## List of 1
##  $ eval: logi FALSE
## 
## 
  |                                                                            
  |......................................................................| 100%
##   ordinary text without R code
## output file: report.knit.md
## "C:/Program Files/RStudio/bin/pandoc/pandoc" +RTS -K512m -RTS report.knit.md --to html4 --from markdown+autolink_bare_uris+tex_math_single_backslash --output pandoc1b0c6e164e69.html --lua-filter "C:\Users\yitia\Documents\R\win-library\4.1\rmarkdown\rmarkdown\lua\pagebreak.lua" --lua-filter "C:\Users\yitia\Documents\R\win-library\4.1\rmarkdown\rmarkdown\lua\latex-div.lua" --self-contained --variable bs3=TRUE --standalone --section-divs --table-of-contents --toc-depth 2 --variable toc_float=1 --variable toc_selectors=h1,h2 --variable toc_collapsed=1 --variable toc_smooth_scroll=1 --variable toc_print=1 --template "C:\Users\yitia\Documents\R\win-library\4.1\rmarkdown\rmd\h\default.html" --no-highlight --variable highlightjs=1 --number-sections --variable theme=sandstone --include-in-header "C:\Users\yitia\AppData\Local\Temp\RtmpWIQTEi\rmarkdown-str1b0c44831079.html" --mathjax --variable "mathjax-url:https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML" --citeproc
## 
## Output created: Detroit, Michigan.html
## [1] "~~~~~~~~~~~~"
## [1] "geom warnings can be disregarded"
## [1] "Report created at C:/Users/yitia/Desktop/470/part.1/Detroit, Michigan.html"
stats <- cmfproperty::calc_iaao_stats(ratios)
head(stats)
binned <- 
  cmfproperty::binned_scatter(
    ratios,
    min_reporting_yr = 2011,
    max_reporting_yr = 2020,
    jurisdiction_name = "Detroit, Michigan"
  )

print(binned[[1]])
## [1] "In 2020, the most expensive homes (the top decile) were assessed at 13.9% of their value and the least expensive homes (the bottom decile) were assessed at 6.8%. In other words, the least expensive homes were assessed at <b>0.49 times</b> the rate applied to the most expensive homes. Across our sample from 2011 to 2020, the most expensive homes were assessed at 15.3% of their value and the least expensive homes were assessed at 229.5%, which is <b>14.97 times</b> the rate applied to the most expensive homes."
binned[[2]]

pct_over <-
  cmfproperty::pct_over_under(
    ratios,
    min_reporting_yr = 2011,
    max_reporting_yr = 2020,
    jurisdiction_name = "Detroit, Michigan"
  )
## Joining, by = "TAX_YEAR"
print(pct_over[[1]])
## [1] "In Detroit, Michigan, <b>72%</b> of the lowest value homes are overassessed and <b>5%</b> of the highest value homes are overassessed."
pct_over[[2]]

#section 3
summary_info <-
  cmfproperty::regression_tests(ratios, produce_table = TRUE)
## 
## <table style="text-align:center"><tr><td colspan="4" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="3">Dependent Variable</td></tr>
## <tr><td></td><td colspan="3" style="border-bottom: 1px solid black"></td></tr>
## <tr><td style="text-align:left"></td><td>ASSESSED_VALUE</td><td>log(ASSESSED_VALUE)</td><td>RATIO</td></tr>
## <tr><td style="text-align:left"></td><td>(1)</td><td>(2)</td><td>(3)</td></tr>
## <tr><td colspan="4" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">SALE_PRICE</td><td>0.02<sup>***</sup></td><td></td><td>-0.0000<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.0004)</td><td></td><td>(0.0000)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">log(SALE_PRICE)</td><td></td><td>0.05<sup>***</sup></td><td></td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.003)</td><td></td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Constant</td><td>15,815.31<sup>***</sup></td><td>8.86<sup>***</sup></td><td>1.19<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(46.94)</td><td>(0.03)</td><td>(0.004)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td></tr>
## <tr><td colspan="4" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Observations</td><td>112,731</td><td>112,731</td><td>112,731</td></tr>
## <tr><td style="text-align:left">R<sup>2</sup></td><td>0.03</td><td>0.003</td><td>0.05</td></tr>
## <tr><td style="text-align:left">Adjusted R<sup>2</sup></td><td>0.03</td><td>0.003</td><td>0.05</td></tr>
## <tr><td colspan="4" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="3" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
## </table>
kableExtra::kable(summary_info)
Model Value Test T Statistic Conclusion Model Description
paglin72 15815.3136120 > 0 336.89233 Regressive AV ~ SP
cheng74 0.0529021 < 1 19.75547 Regressive ln(AV) ~ ln(SP)
IAAO78 -0.0000023 < 0 -77.10270 Regressive RATIO ~ SP
kochin82 0.0652176 < 1 19.75547 Regressive ln(SP) ~ ln(AV)
bell84 15048.4638366 > 0 307.98776 Regressive AV ~ SP + SP^2
0.0000000 < 0 -50.34653 Regressive AV ~ SP + SP^2
sunderman90 -6345.3808588 > 0 -27.12189 Progressive AV ~ SP + low + high + low * SP + high * SP
#section 4
foreclosures <- foreclosures %>% 
  select(c(`2002`,`2003`,`2004`,`2006`,`2007`,`2008`,`2009`,`2010`,`2011`,`2012`,`2013`,`2014`,`2015`,`2016`,`2017`,`2018`,`2019`))
foreclosures[is.na(foreclosures)] <- 0
f <- colSums(foreclosures)
f
##  2002  2003  2004  2006  2007  2008  2009  2010  2011  2012  2013  2014  2015 
##   246  2484  2034  5108  2252  4111  7986 11632 12999 20040 18785 23845 24340 
##  2016  2017  2018  2019 
## 12590  6289  3880  2748
frame <- data.frame(
  year = c(2002,2003,2004,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019),
           foreclosures_num =c(246,2484, 2034,5108,2252,4111,7986,11632,12999,20040,18785,23845,24340,12590,6289,3880,2748))
frame
join_f<- ratios %>%
left_join(frame, by=c( "SALE_YEAR"="year"))
join_f
m1 <- lm(RATIO ~ foreclosures_num, data=join_f)
summary(m1)
## 
## Call:
## lm(formula = RATIO ~ foreclosures_num, data = join_f)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.5384 -0.6979 -0.3060  0.3041  6.6066 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      3.869e-01  7.269e-03   53.22   <2e-16 ***
## foreclosures_num 4.731e-05  4.464e-07  105.99   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.219 on 114468 degrees of freedom
##   (因为不存在,3270个观察量被删除了)
## Multiple R-squared:  0.08937,    Adjusted R-squared:  0.08936 
## F-statistic: 1.123e+04 on 1 and 114468 DF,  p-value: < 2.2e-16